{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO0AAABECAYAAABtXrKpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAB/klEQVR4nO3YMYrUYBzG4X+GDaLDuIWewGIu4BksvIqVILZewMrGUvAUFp7BC0xrJ6uwu2RFAvNZyFo4RhA2m331ecp8zRvIj5mka621AmKslh4A/B3RQhjRQhjRQpijqYP9fl/DMFTf99V13XVugv9aa63Gcaz1el2r1eHv6mS0wzDUbrebdRwwbbvd1mazObg+GW3f91VV9ezVmzo5PZtv2YLevnhar989X3rGbJ48flmr9x+WnjGb/aOHdbF/sPSMK9fVWLdXH382+KvJaC//Ep+cntWnL6fzrLsBTi8+Lz1hVt3Xb0tPmFWr3z/Y/4Kp11IfoiCMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCGMaCHM0dRBa62qqu4f3722MUs4vnNv6QmzardvLT1hVl2NS0+4cpf3dNngwXmbODk/P6/dbjffMuCPttttbTabg+uT0e73+xqGofq+r67rZh8I/NBaq3Eca71e12p1+AY7GS1wM/kQBWFEC2FEC2FEC2G+A7O0WWjJZuMgAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 288x72 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# load required packages\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns; sns.set()\n",
    "import statsmodels.api as sm\n",
    "import pickle\n",
    "import joblib\n",
    "from __future__ import division\n",
    "from IPython.display import display\n",
    "\n",
    "# set styling for plots\n",
    "%matplotlib inline  \n",
    "sns.set(style = \"whitegrid\")\n",
    "sns.set_palette('cubehelix',4)\n",
    "plt.rc('text', usetex=True)\n",
    "plt.rc('font', family='serif')\n",
    "sns.palplot(sns.color_palette())\n",
    "\n",
    "# define names of models\n",
    "clfs = [\"LogitNonLinear\",\"RandomForestIsotonic\"]\n",
    "outnames = {\"LogitNonLinear\": \"Nonlinear Logit\",\"RandomForestIsotonic\" : \"Random Forest\",\"xgboost_output\": \"XGBoost\",\"Difference\" : \"Difference\"}\n",
    "\n",
    "# set race categories for plots\n",
    "plotrace = [\"Asian\",\"White Non-Hispanic\",\"White Hispanic\",\"Black\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load and clean data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/psg24/opt/anaconda3/lib/python3.8/site-packages/numpy/lib/arraysetops.py:583: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
      "  mask |= (ar1 == a)\n"
     ]
    }
   ],
   "source": [
    "# set paths to saved data\n",
    "path = '../../data/' \n",
    "\n",
    "# load main dataset \n",
    "allvals = pd.read_csv(path + 'all_vals_race1_interestrate1.csv',index_col=0)\n",
    "\n",
    "# load predictions \n",
    "preds00 = pd.read_csv(path + '../output/_race0_interestrate0.csv',index_col=0) \n",
    "\n",
    "# load saved classifiers and save feature names\n",
    "clfpath = '../../output/'\n",
    "models = {}; features = {}\n",
    "for name in clfs:\n",
    "    if name.startswith('Logit'):\n",
    "        models[name] = pickle.load(open(clfpath + name + \"_race0_interestrate0.pkl\",'rb'))\n",
    "        features[name] = list(models[name].params.index)\n",
    "    else:\n",
    "        models[name] = joblib.load(open(clfpath + name + \"_race0_interestrate0.pkl\",'rb'))\n",
    "        features[name] = [x.lower() for x in list(pd.read_csv(path + 'feature_names_norace' + name + '.csv').columns)]\n",
    "features['RandomForestIsotonic'].remove('sato')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clean race variable in main data\n",
    "racecols = [col for col in list(allvals) if col.startswith('race_dum')]\n",
    "allvals[\"White Non-Hispanic\"] = 1 -  allvals[racecols].sum(axis=1)\n",
    "allvals[\"Race\"] = allvals[racecols + [\"White Non-Hispanic\"]].idxmax(axis=1).str.replace('race_dum_','').replace('White hisp','White Hispanic')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# merge main data and predictions \n",
    "#df0 = allvals.drop(['LogitNonLinear','RandomForest','RandomForestIsotonic'], axis=\"columns\")\n",
    "df0 = allvals\n",
    "df0 = df0.join(preds00)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 4: Example heatmaps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# utility for adding fico, ltv and income bins used by logit to a dataframe\n",
    "def mkbins(df):\n",
    "    bins = pd.DataFrame(index = df.index)\n",
    "\n",
    "    fico_cuts = [0] + list(range(280,870,20))\n",
    "    fico_bin = pd.cut(df[\"fico_orig_fill\"], fico_cuts, labels = fico_cuts[0:-1], right = False).fillna(0)\n",
    "    fico_bin.loc[(fico_bin>0) & (fico_bin < 600),] = 600\n",
    "    fico_bin.loc[fico_bin==840,] = 820\n",
    "\n",
    "    ltv_cuts = list(range(20,110,5))\n",
    "    ltv_bin = pd.cut(df[\"ltv_ratio_fill\"], ltv_cuts, labels = ltv_cuts[0:-1], right = False)\n",
    "    ltv_80 = (df[\"ltv_ratio_fill\"]==80.0).astype(int)    \n",
    "\n",
    "    inc_cuts = list(range(-25,550,25))\n",
    "    income_bin = pd.cut(df[\"applicant_income\"],inc_cuts, labels = inc_cuts[0:-1],right = False)\n",
    "\n",
    "    bins = bins.join(pd.get_dummies(fico_bin, prefix = \"fico_bin_dum\"))\n",
    "    bins = bins.join(pd.get_dummies(income_bin, prefix = \"income_bin_dum\"))\n",
    "    bins = bins.join(pd.get_dummies(ltv_bin, prefix = \"ltv_bin_dum\"))\n",
    "    bins[\"ltv_80_dum_0\"] = 1 - ltv_80\n",
    "    bins['const'] = 1\n",
    "    return bins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"['prop_state_dum_ny', 'prop_state_dum_nh', 'prop_state_dum_mn', 'prop_state_dum_ky', 'prop_state_dum_ca', 'prop_state_dum_ut', 'prop_state_dum_ia', 'prop_state_dum_mo', 'prop_state_dum_nd', 'prop_state_dum_tx', 'prop_state_dum_de', 'jumbo_flg_dum_n', 'prop_state_dum_md', 'prop_state_dum_in', 'prop_state_dum_dc', 'prop_state_dum_id', 'prop_state_dum_ga', 'prop_state_dum_la', 'prop_state_dum_mt', 'prop_state_dum_vt', 'prop_state_dum_or', 'prop_state_dum_az', 'prop_state_dum_ms', 'prop_state_dum_mi', 'prop_state_dum_sc', 'prop_state_dum_hi', 'prop_state_dum_nc', 'prop_state_dum_ct', 'prop_state_dum_ok', 'prop_state_dum_sd', 'prop_state_dum_ks', 'prop_state_dum_il', 'loan_type_mcdash_dum_c', 'prop_state_dum_pa', 'prop_state_dum_ar', 'prop_state_dum_fl', 'prop_state_dum_nv', 'prop_state_dum_oh', 'prop_state_dum_ma', 'prop_state_dum_ri', 'prop_state_dum_tn', 'prop_state_dum_wa', 'prop_state_dum_nm', 'prop_state_dum_co', 'prop_state_dum_wv', 'prop_state_dum_me', 'prop_state_dum_ne', 'prop_state_dum_nj', 'prop_state_dum_ak', 'prop_state_dum_va', 'prop_state_dum_wi', 'prop_state_dum_al'] not in index\"",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-ec8765fd29a4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mclfs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Logit'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m         \u001b[0mpred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     41\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     42\u001b[0m         \u001b[0mpred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3028\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3029\u001b[0m                 \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3030\u001b[0;31m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_listlike_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mraise_missing\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3031\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3032\u001b[0m         \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_listlike_indexer\u001b[0;34m(self, key, axis, raise_missing)\u001b[0m\n\u001b[1;32m   1264\u001b[0m             \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1266\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_read_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mraise_missing\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mraise_missing\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1267\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_read_indexer\u001b[0;34m(self, key, indexer, axis, raise_missing)\u001b[0m\n\u001b[1;32m   1314\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mraise_missing\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1315\u001b[0m                 \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1316\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{not_found} not in index\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1317\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1318\u001b[0m             \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: \"['prop_state_dum_ny', 'prop_state_dum_nh', 'prop_state_dum_mn', 'prop_state_dum_ky', 'prop_state_dum_ca', 'prop_state_dum_ut', 'prop_state_dum_ia', 'prop_state_dum_mo', 'prop_state_dum_nd', 'prop_state_dum_tx', 'prop_state_dum_de', 'jumbo_flg_dum_n', 'prop_state_dum_md', 'prop_state_dum_in', 'prop_state_dum_dc', 'prop_state_dum_id', 'prop_state_dum_ga', 'prop_state_dum_la', 'prop_state_dum_mt', 'prop_state_dum_vt', 'prop_state_dum_or', 'prop_state_dum_az', 'prop_state_dum_ms', 'prop_state_dum_mi', 'prop_state_dum_sc', 'prop_state_dum_hi', 'prop_state_dum_nc', 'prop_state_dum_ct', 'prop_state_dum_ok', 'prop_state_dum_sd', 'prop_state_dum_ks', 'prop_state_dum_il', 'loan_type_mcdash_dum_c', 'prop_state_dum_pa', 'prop_state_dum_ar', 'prop_state_dum_fl', 'prop_state_dum_nv', 'prop_state_dum_oh', 'prop_state_dum_ma', 'prop_state_dum_ri', 'prop_state_dum_tn', 'prop_state_dum_wa', 'prop_state_dum_nm', 'prop_state_dum_co', 'prop_state_dum_wv', 'prop_state_dum_me', 'prop_state_dum_ne', 'prop_state_dum_nj', 'prop_state_dum_ak', 'prop_state_dum_va', 'prop_state_dum_wi', 'prop_state_dum_al'] not in index\""
     ]
    }
   ],
   "source": [
    "# make predictions on fico/income grid in figure\n",
    "\n",
    "# grid of income / fico\n",
    "inc_grid = np.linspace(20,200,50)\n",
    "fic_grid = np.linspace(650,820,50)\n",
    "inc_list = [x for x in inc_grid for y in fic_grid]\n",
    "fic_list = [y for x in inc_grid for y in fic_grid]\n",
    "\n",
    "# set up grid\n",
    "G = len(inc_grid)*len(fic_grid)\n",
    "K = len(features[\"RandomForestIsotonic\"])\n",
    "x = pd.DataFrame(np.zeros((G,K)),columns = features[\"RandomForestIsotonic\"])\n",
    "\n",
    "# insert income/fico grids\n",
    "x[\"applicant_income\"]=inc_list\n",
    "x[\"fico_orig_fill\"]=fic_list\n",
    "\n",
    "# hard code the remaining variables as explained in paper \n",
    "ltv = 79.99 # exactly 80 doesn't work\n",
    "amt = 300000\n",
    "x[\"ltv_ratio_fill\"] = ltv\n",
    "x[\"orig_amt\"]=amt \n",
    "x[\"log_orig_amt\"]=np.log(amt)\n",
    "# x[\"sato\"] = 0\n",
    "x[\"occupancy_type_dum_1\"]= 1\n",
    "x[\"investor_type_dum_2\"] = 1\n",
    "x[\"loan_purpose_dum_1\"] = 1\n",
    "x[\"orig_year_dum_2011\"] = 1\n",
    "x[\"prop_state_dum_CA\"] = 1\n",
    "x[\"prop_state_dum_TX\"] = 0\n",
    "x[\"cur_int_rate\"] = 4.5\n",
    "\n",
    "xbins = mkbins(x)\n",
    "x = x.join(xbins)\n",
    "\n",
    "# predictions for PD contour plot \n",
    "pred = pd.DataFrame(index = x.index, columns = clfs)\n",
    "for name in clfs:\n",
    "    if name.startswith('Logit'): \n",
    "        pred[name] = models[name].predict(x[features[name]]) \n",
    "    else:\n",
    "        pred[name] = models[name].predict_proba(x[features[name]])[:,1]\n",
    "        \n",
    "# get group densities on grid\n",
    "race = df0['Race']\n",
    "inc_grid1 = np.linspace(20,200,10)\n",
    "fic_grid1 = np.linspace(650,820,10)\n",
    "heat = pd.DataFrame(index = allvals.index, columns = [\"Income\",\"FICO\",\"Minority\"])\n",
    "heat[\"Income\"] = pd.cut(allvals[\"applicant_income\"],inc_grid1,labels = inc_grid1[1:].astype('int'))\n",
    "heat[\"FICO\"] = pd.cut(allvals[\"fico_orig_fill\"],fic_grid1,labels = fic_grid1[1:].astype('int'))\n",
    "# heat[\"Minority\"] = ((race == \"Black\") | (race == \"White Hispanic\")).astype('int')\n",
    "heat[\"Minority\"] = (race == \"Black\").astype('int')\n",
    "heat[\"Majority\"] = (race == \"White Non-Hispanic\").astype('int')\n",
    "inrange = heat[\"Income\"].notnull() & heat[\"FICO\"].notnull()\n",
    "heat = heat[inrange]\n",
    "piv = {}\n",
    "piv['Black'] = pd.pivot_table(data = heat, index = \"FICO\", columns = \"Income\"\n",
    "                            , values = \"Minority\", fill_value = 0,\n",
    "                            aggfunc = (lambda x: x.sum()/heat[\"Minority\"].sum()))\n",
    "piv['White Non-Hispanic'] = pd.pivot_table(data = heat, index = \"FICO\", columns = \"Income\"\n",
    "                            , values = \"Majority\", fill_value = 0,\n",
    "                            aggfunc = (lambda x: x.sum()/heat[\"Majority\"].sum()))\n",
    "\n",
    "# draw contour plots\n",
    "X,Y = np.meshgrid(inc_grid,fic_grid)\n",
    "fig,ax = plt.subplots(2,len(clfs),figsize = (10,8))\n",
    "ext = [inc_grid1.min(),inc_grid1.max(), \n",
    "       fic_grid1.min(),fic_grid1.max()]\n",
    "cvals = [[0.1,0.2,0.3,0.5],[0.2,0.3,0.6,1,1.3,1.6]]\n",
    "for (i,name) in enumerate(clfs):\n",
    "    for (j,g) in enumerate(piv.keys()[::-1]):\n",
    "        # dist for group j\n",
    "        ax[i,j].imshow(piv[g][::-1],cmap=\"Purples\",extent = ext,alpha = 0.6)\n",
    "        \n",
    "        # contour for clf i overlaid\n",
    "        Z = pred[name].values.reshape(X.shape)\n",
    "       \n",
    "        CS = ax[i,j].contour(X,Y,100*Z,cvals[i],cmap=None,colors='k')\n",
    "        plt.clabel(CS, fontsize=14,inline=True,fmt='%1.1f')\n",
    "        \n",
    "#         ax[i,0].set_ylabel('%s' % (outnames[name]),fontsize=18)\n",
    "        ax[i,j].set_xlabel('Income',fontsize = 14)\n",
    "        ax[i,j].set_ylabel('FICO',fontsize = 14)\n",
    "\n",
    "\n",
    "        ax[i,j].set_title('%s: %s' % (outnames[name],g),fontsize=15)\n",
    "\n",
    "\n",
    "        ax[i,j].grid(False)\n",
    "        ax[i,j].tick_params(labelsize = 16)\n",
    "\n",
    "\n",
    "plt.tight_layout(pad=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 5: CDFs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# utility for making cdf plots (diff of level or log PD)\n",
    "def cdf_pd_diff(df,race,plotrace,clfs,lim = (-1,1),log=False):\n",
    "    fig, ax = plt.subplots(1,1,figsize=(7.5,5))\n",
    "    if log:\n",
    "        diff = df[clfs[1]].apply(np.log) - df[clfs[0]].apply(np.log)\n",
    "        factor = 1\n",
    "    else:\n",
    "        diff = df[clfs[1]] - df[clfs[0]]\n",
    "        factor = 100\n",
    "    for group in plotrace:\n",
    "        x = np.sort(factor*(diff[race==group]))\n",
    "        y = np.linspace(1,x.shape[0],x.shape[0]) / x.shape[0]\n",
    "        ax.plot(x,y,label=group)\n",
    "    ax.set_xlim(lim); ax.set_ylim((0,1))\n",
    "    ax.set_xticks([-1,-0.5,0,0.5,1])\n",
    "#     ax.set_yticks([0,0.25,0.5,0.75,1])\n",
    "    ax.axvline(0,color='k')\n",
    "    ax.axhline(0.5,color='k',linestyle='--')\n",
    "    plt.xticks(fontsize=16); plt.yticks(fontsize = 16)\n",
    "    if log: \n",
    "        ax.set_xlabel('Log(PD from %s) - Log(PD from %s)' %(outnames[clfs[1]],outnames[clfs[0]]),fontsize = 18)\n",
    "    else:\n",
    "        ax.set_xlabel('PD from %s - PD from %s' %(outnames[clfs[1]],outnames[clfs[0]]),fontsize = 18)\n",
    "    ax.set_ylabel('Cumulative Share',fontsize=18)\n",
    "    ax.legend(frameon=True,framealpha=1,fontsize = 16,loc='lower right')\n",
    "\n",
    "# make figures\n",
    "race = df0['Race']\n",
    "cdf_pd_diff(df0,race,plotrace,clfs,lim = (-1,1),log=False)\n",
    "cdf_pd_diff(df0,race,plotrace,clfs,lim = (-1.5,1.5),log=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Table VI: Decomposition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import scores\n",
    "score00 = pd.read_csv(path + '../output/eval_output_race0_interestrate0.csv',index_col=0)\n",
    "score10 = pd.read_csv(path + '../output/eval_output_race1_interestrate0.csv',index_col=0)\n",
    "display(score00)\n",
    "display(score10)\n",
    "\n",
    "# decompositions using saved scores\n",
    "x_part = score00.loc[clfs[0]] - score00.loc[clfs[1]]\n",
    "total = score00.loc[clfs[0]] - score10.loc[clfs[1]]\n",
    "g_part = score00.loc[clfs[0]] - score10.loc[clfs[0]]\n",
    "\n",
    "racefirst = (100*pd.DataFrame({\"Race\": g_part / total, \"Technology\": 1 - g_part/ total})).astype('float').round(2)\n",
    "\n",
    "techfirst = (100*pd.DataFrame({\"Technology\": x_part/ total , \"Race\": 1 - x_part / total})).astype('float').round(2)\n",
    "techfirst = techfirst[[\"Technology\",\"Race\"]]\n",
    "\n",
    "print('adding race first')\n",
    "display(racefirst)\n",
    "print('adding technology first')\n",
    "display(techfirst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
